Natalia Castilla Reyes
Miguel Angel Quintero
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/content/spark-2.4.8-bin-hadoop2.7"
#######################SPARK
!apt-get install openjdk-8-jdk-headless -qq > /dev/null
!wget -q https://downloads.apache.org/spark/spark-2.4.8/spark-2.4.8-bin-hadoop2.7.tgz
!tar xf spark-2.4.8-bin-hadoop2.7.tgz
#@title Librerias necesarias
#Librerias generales
import time
import math
import datetime
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm as tqdm
import pickle
import seaborn as sns
from matplotlib import colors
import matplotlib.pyplot as plt
from matplotlib.colors import Normalize
import plotly
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
import dask
import joblib
from dask.dataframe import DataFrame as dd
import warnings
#Configuraciones
warnings.filterwarnings("ignore")
pd.set_option('max_columns', 50)
plt.style.use('bmh')
color_pal = plt.rcParams['axes.prop_cycle'].by_key()['color']
pd.options.display.max_columns = 400
#########Modelos
import xgboost as xgb
import lightgbm as lgb
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
#Tensor Flow
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
# importa objetos de keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, LSTM
print("Versión de Tensorflow: ", tf.__version__)
# optimizador
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from tensorflow.keras import regularizers
from tensorflow.keras import callbacks
from tensorflow.keras.callbacks import EarlyStopping
##Para definir la funcion de perdida que necesitaos en keeras
from keras import backend as K
#Variable global
DAYS_PRED = 28
ruta = "/content/drive/MyDrive/Proyecto_topicos/"
Versión de Tensorflow: 2.5.0
df = spark.read.option("header", "true").option("encoding", "latin1").parquet(ruta+"df_total.parquet")
print("Cantidad de datos en df:",df.count())
#@title Funciones globales
#Downcast para liberar memoria
def downcast(df):
""" Esta función nos permite adecuar el tipo de datos que estamos empleando de tal manera que podamos
liberar memoria"""
cols = df.dtypes.index.tolist()
types = df.dtypes.values.tolist()
for i,t in enumerate(types):
if 'int' in str(t):
if df[cols[i]].min() > np.iinfo(np.int8).min and df[cols[i]].max() < np.iinfo(np.int8).max:
df[cols[i]] = df[cols[i]].astype(np.int8)
elif df[cols[i]].min() > np.iinfo(np.int16).min and df[cols[i]].max() < np.iinfo(np.int16).max:
df[cols[i]] = df[cols[i]].astype(np.int16)
elif df[cols[i]].min() > np.iinfo(np.int32).min and df[cols[i]].max() < np.iinfo(np.int32).max:
df[cols[i]] = df[cols[i]].astype(np.int32)
else:
df[cols[i]] = df[cols[i]].astype(np.int64)
elif 'float' in str(t):
if df[cols[i]].min() > np.finfo(np.float16).min and df[cols[i]].max() < np.finfo(np.float16).max:
df[cols[i]] = df[cols[i]].astype(np.float16)
elif df[cols[i]].min() > np.finfo(np.float32).min and df[cols[i]].max() < np.finfo(np.float32).max:
df[cols[i]] = df[cols[i]].astype(np.float32)
else:
df[cols[i]] = df[cols[i]].astype(np.float64)
elif t == np.object:
if cols[i] == 'date':
df[cols[i]] = pd.to_datetime(df[cols[i]], format='%Y-%m-%d')
else:
df[cols[i]] = df[cols[i]].astype('category')
return df
def introduce_nulls(df, color_map=False):
idx = pd.date_range(df.date.dt.date.min(), df.date.dt.date.max())
df = df.set_index('date')
df = df.reindex(idx)
df.reset_index(inplace=True)
if color_map==True:
df.rename(columns={'level_0':'date'},inplace=True)
else:
df.rename(columns={'index':'date'},inplace=True)
return df
def plot_metric(df,state,store,metric, color_map):
store_sales = df[(df['state_id']==state)&(df['store_id']==store)&(df['date']<='2016-05-22')]
food_sales = store_sales[store_sales['cat_id']=='FOODS']
store_sales = store_sales.groupby(['date','snap_'+state],as_index=False)['sold','revenue'].sum()
snap_sales = store_sales[store_sales['snap_'+state]==1]
non_snap_sales = store_sales[store_sales['snap_'+state]==0]
food_sales = food_sales.groupby(['date','snap_'+state],as_index=False)['sold','revenue'].sum()
snap_foods = food_sales[food_sales['snap_'+state]==1]
non_snap_foods = food_sales[food_sales['snap_'+state]==0]
non_snap_sales = introduce_nulls(non_snap_sales)
snap_sales = introduce_nulls(snap_sales)
non_snap_foods = introduce_nulls(non_snap_foods)
snap_foods = introduce_nulls(snap_foods)
fig = go.Figure()
fig.add_trace(go.Scatter(x=non_snap_sales['date'],y=non_snap_sales[metric],
name='Total '+metric+'(Non-SNAP)'))
fig.add_trace(go.Scatter(x=snap_sales['date'],y=snap_sales[metric],
name='Total '+metric+'(SNAP)'))
fig.add_trace(go.Scatter(x=non_snap_foods['date'],y=non_snap_foods[metric],
name='Food '+metric+'(Non-SNAP)'))
fig.add_trace(go.Scatter(x=snap_foods['date'],y=snap_foods[metric],
name='Food '+metric+'(SNAP)'))
fig.update_yaxes(title_text='Total de productos vendidos' if metric=='sold' else 'Total de ingresos($)')
fig.update_layout(template='seaborn',title=store)
fig.update_layout(
xaxis=dict(
#autorange=True,
range = ['2014-01-01','2016-05-22'],
rangeselector=dict(
buttons=list([
dict(count=1,
label="1m",
step="month",
stepmode="backward"),
dict(count=6,
label="6m",
step="month",
stepmode="backward"),
dict(count=1,
label="YTD",
step="year",
stepmode="todate"),
dict(count=1,
label="1y",
step="year",
stepmode="backward"),
dict(count=2,
label="2y",
step="year",
stepmode="backward"),
dict(step="all")
])
),
rangeslider=dict(
autorange=True,
),
type="date"
), title='Tienda:'+' '+ store)
return fig
def calmap(cal_data, state, store, scale, color_map):
cal_data = cal_data[(cal_data['state_id']==state)&(cal_data['store_id']==store)]
years = cal_data.year.unique().tolist()
fig = make_subplots(rows=len(years),cols=1,shared_xaxes=True,vertical_spacing=0.005)
r=1
for year in years:
data = cal_data[cal_data['year']==year]
data = introduce_nulls(data, color_map)
fig.add_trace(go.Heatmap(
z=data.sold,
x=data.week,
y=data.day_name,
hovertext=data.date.dt.date,
coloraxis = "coloraxis",name=year,
),r,1)
fig.update_yaxes(title_text=year,tickfont=dict(size=5),row = r,col = 1)
r+=1
fig.update_xaxes(range=[1,53],tickfont=dict(size=10), nticks=53)
fig.update_layout(coloraxis = {'colorscale':scale})
fig.update_layout(template='seaborn', title='Tienda:'+' '+ store)
return fig
Cargando la información procesada:
df = pd.read_parquet(ruta+"df_total_aux_1.parquet")
df[(df['store_id']=='CA_3')&(df['year']>=2015)]
| store_id | item_id | wm_yr_wk | d | id | dept_id | cat_id | state_id | sold | date | weekday | wday | month | year | event_name_1 | event_type_1 | event_name_2 | event_type_2 | snap_CA | snap_TX | snap_WI | sell_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1179611 | CA_3 | FOODS_1_001 | 11502 | d_1471 | FOODS_1_001_CA_3_evaluation | FOODS_1 | FOODS | CA | 0.0 | 2015-02-07 | Saturday | 1 | 2 | 2015.0 | None | None | None | None | 1 | 1 | 0 | 2.24 |
| 1179612 | CA_3 | FOODS_1_001 | 11502 | d_1472 | FOODS_1_001_CA_3_evaluation | FOODS_1 | FOODS | CA | 1.0 | 2015-02-08 | Sunday | 2 | 2 | 2015.0 | None | None | None | None | 1 | 0 | 1 | 2.24 |
| 1179613 | CA_3 | FOODS_1_001 | 11502 | d_1473 | FOODS_1_001_CA_3_evaluation | FOODS_1 | FOODS | CA | 0.0 | 2015-02-09 | Monday | 3 | 2 | 2015.0 | None | None | None | None | 1 | 1 | 1 | 2.24 |
| 1179614 | CA_3 | FOODS_1_001 | 11502 | d_1474 | FOODS_1_001_CA_3_evaluation | FOODS_1 | FOODS | CA | 1.0 | 2015-02-10 | Tuesday | 4 | 2 | 2015.0 | None | None | None | None | 1 | 0 | 0 | 2.24 |
| 1179615 | CA_3 | FOODS_1_001 | 11502 | d_1475 | FOODS_1_001_CA_3_evaluation | FOODS_1 | FOODS | CA | 2.0 | 2015-02-11 | Wednesday | 5 | 2 | 2015.0 | None | None | None | None | 0 | 1 | 1 | 2.24 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 55036191 | CA_3 | HOUSEHOLD_2_516 | 11613 | d_1914 | HOUSEHOLD_2_516_CA_3_evaluation | HOUSEHOLD_2 | HOUSEHOLD | CA | 0.0 | 2016-04-25 | Monday | 3 | 4 | 2016.0 | None | None | None | None | 0 | 0 | 0 | 5.94 |
| 55036192 | CA_3 | HOUSEHOLD_2_516 | 11613 | d_1915 | HOUSEHOLD_2_516_CA_3_evaluation | HOUSEHOLD_2 | HOUSEHOLD | CA | 2.0 | 2016-04-26 | Tuesday | 4 | 4 | 2016.0 | None | None | None | None | 0 | 0 | 0 | 5.94 |
| 55036193 | CA_3 | HOUSEHOLD_2_516 | 11613 | d_1916 | HOUSEHOLD_2_516_CA_3_evaluation | HOUSEHOLD_2 | HOUSEHOLD | CA | 0.0 | 2016-04-27 | Wednesday | 5 | 4 | 2016.0 | None | None | None | None | 0 | 0 | 0 | 5.94 |
| 55036194 | CA_3 | HOUSEHOLD_2_516 | 11613 | d_1917 | HOUSEHOLD_2_516_CA_3_evaluation | HOUSEHOLD_2 | HOUSEHOLD | CA | 0.0 | 2016-04-28 | Thursday | 6 | 4 | 2016.0 | None | None | None | None | 0 | 0 | 0 | 5.94 |
| 55036195 | CA_3 | HOUSEHOLD_2_516 | 11613 | d_1918 | HOUSEHOLD_2_516_CA_3_evaluation | HOUSEHOLD_2 | HOUSEHOLD | CA | 0.0 | 2016-04-29 | Friday | 7 | 4 | 2016.0 | None | None | None | None | 0 | 0 | 0 | 5.94 |
1548892 rows × 22 columns
df = pd.read_parquet(ruta+"df_total_aux_1.parquet")
print("dimensiòn del dataframe:", df.shape)
dimensiòn del dataframe: (26617770, 22)
#Modificando algunas variables
df_bd = np.round(df.memory_usage().sum()/(1024*1024),1)
df['wday'] =df['wday'].astype(int)
df['year'] =df['year'].astype(float)
df['month'] =df['month'].astype(int)
df['snap_CA'] =df['snap_CA'].astype(int)
df['snap_TX'] =df['snap_TX'].astype(int)
df['snap_WI'] =df['snap_WI'].astype(int)
df['sell_price'] =df['sell_price'].astype(float)
df['sold'] =df['sold'].astype(float)
df['date'] =pd.to_datetime( df['date'])
df.head()
| store_id | item_id | wm_yr_wk | d | id | dept_id | cat_id | state_id | sold | date | weekday | wday | month | year | event_name_1 | event_type_1 | event_name_2 | event_type_2 | snap_CA | snap_TX | snap_WI | sell_price | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 133 | CA_1 | FOODS_1_001 | 11403 | d_1114 | FOODS_1_001_CA_1_evaluation | FOODS_1 | FOODS | CA | 0.0 | 2014-02-15 | Saturday | 1 | 2 | 2014.0 | None | None | None | None | 0 | 1 | 1 | 2.24 |
| 134 | CA_1 | FOODS_1_001 | 11403 | d_1115 | FOODS_1_001_CA_1_evaluation | FOODS_1 | FOODS | CA | 2.0 | 2014-02-16 | Sunday | 2 | 2 | 2014.0 | None | None | None | None | 0 | 0 | 0 | 2.24 |
| 135 | CA_1 | FOODS_1_001 | 11403 | d_1116 | FOODS_1_001_CA_1_evaluation | FOODS_1 | FOODS | CA | 1.0 | 2014-02-17 | Monday | 3 | 2 | 2014.0 | PresidentsDay | National | None | None | 0 | 0 | 0 | 2.24 |
| 136 | CA_1 | FOODS_1_001 | 11403 | d_1117 | FOODS_1_001_CA_1_evaluation | FOODS_1 | FOODS | CA | 0.0 | 2014-02-18 | Tuesday | 4 | 2 | 2014.0 | None | None | None | None | 0 | 0 | 0 | 2.24 |
| 137 | CA_1 | FOODS_1_001 | 11403 | d_1118 | FOODS_1_001_CA_1_evaluation | FOODS_1 | FOODS | CA | 0.0 | 2014-02-19 | Wednesday | 5 | 2 | 2014.0 | None | None | None | None | 0 | 0 | 0 | 2.24 |
df.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 26617770 entries, 133 to 59181089 Data columns (total 22 columns): # Column Dtype --- ------ ----- 0 store_id object 1 item_id object 2 wm_yr_wk object 3 d object 4 id object 5 dept_id object 6 cat_id object 7 state_id object 8 sold float64 9 date datetime64[ns] 10 weekday object 11 wday int64 12 month int64 13 year float64 14 event_name_1 object 15 event_type_1 object 16 event_name_2 object 17 event_type_2 object 18 snap_CA int64 19 snap_TX int64 20 snap_WI int64 21 sell_price float64 dtypes: datetime64[ns](1), float64(3), int64(5), object(13) memory usage: 4.6+ GB
#Proceso para liberar memoria
%%time
df = downcast(df)
df_ad = np.round(df.memory_usage().sum()/(1024*1024),1)
dic = {'DataFrame':['DataFrame total'],
'Antes DWNC':[df_bd],
'Después DWNC':[df_ad]}
memory = pd.DataFrame(dic)
memory = pd.melt(memory, id_vars='DataFrame', var_name='Status', value_name='Memory (MB)')
memory.sort_values('Memory (MB)',inplace=True)
fig = px.bar(memory, x='DataFrame', y='Memory (MB)', color='Status', barmode='group', text='Memory (MB)')
fig.update_traces(texttemplate='%{text} MB', textposition='outside')
fig.update_layout(template='seaborn', title='Efecto del proceso de DWNC')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/reduccion_memoria.html', auto_open=False)
fig.show()
CPU times: user 29 s, sys: 2.3 s, total: 31.3 s Wall time: 31.3 s
Clasificación de la información
group = df.groupby(['state_id','store_id','cat_id','dept_id'],as_index=False)['item_id'].count().dropna()
group['USA'] = 'USA'
group.rename(columns={'state_id':'Estado','store_id':'Tienda','cat_id':'Categoría','dept_id':'Departamento','item_id':'Cantidad'},inplace=True)
fig = px.treemap(group, path=['USA', 'Estado', 'Tienda', 'Categoría', 'Departamento'], values='Cantidad',
color='Cantidad',
color_continuous_scale= px.colors.sequential.Sunset,
title='Walmart: Distribución de Productos',)
fig.update_layout(template='seaborn')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/explicacion_distribucion_datos.html', auto_open=False)
fig.show(renderer="colab")
Estudio de la distribución de los precios
group_price_store = pd.DataFrame(df.groupby(['state_id','store_id','item_id'])['sell_price'].mean().dropna()).reset_index()
fig = px.violin(group_price_store, x='store_id', color='state_id', y='sell_price',box=True, hover_name='item_id', labels={"state_id": "Estado"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Precios de Venta($)')
fig.update_layout(template='seaborn',title='Distribución de los precios por tienda')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/precios_por_tienda.html', auto_open=False)
fig.show()
Notas sobre la distribución de precios por tienda:
Análisis de la distribución de precios por categoría
group_price_cat = pd.DataFrame(df.groupby(['store_id','cat_id','item_id'],as_index=False)['sell_price'].mean().dropna()).reset_index()
fig = px.violin(group_price_cat, x='store_id', color='cat_id', y='sell_price',box=True, hover_name='item_id', labels={"cat_id": "Categoría"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Precio de Venta($)')
fig.update_layout(template='seaborn',title='Distribución de los precios por categoría',)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/precios_x_categoria.html', auto_open=False)
fig.show()
Notas sobre el gráfico:
Análisis de la cantidad de productos vendidos por tienda
group = pd.DataFrame(df.groupby(['year','date','state_id','store_id'], as_index=False)['sold'].sum().dropna()).reset_index()
fig = px.violin(group, x='store_id', color='state_id', y='sold',box=True, labels={"state_id": "Estado"})
fig.update_xaxes(title_text='Tienda')
fig.update_yaxes(title_text='Total de productos vendidos')
fig.update_layout(template='seaborn',title='Distribución de productos vendidos por tienda')
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/cantidad_productos_tienda.html', auto_open=False)
fig.show()
Análisis de los productos vendidos respecto al tiempo transcurrido
#@title code
fig = go.Figure()
title = 'Productos vendidos en cada tienda respecto al tiempo de estudio'
years = group.year.unique().tolist()
buttons = []
y=3
for state in group.state_id.unique().tolist():
group_state = group[group['state_id']==state]
for store in group_state.store_id.unique().tolist():
group_state_store = group_state[group_state['store_id']==store]
fig.add_trace(go.Scatter(name=store, x=group_state_store['date'], y=group_state_store['sold'], showlegend=True,
yaxis='y'+str(y) if y!=1 else 'y'))
y-=1
fig.update_layout(
xaxis=dict(
#autorange=True,
range = ['2014-01-01','2016-05-22'],
rangeselector=dict(
buttons=list([
dict(count=1,
label="1m",
step="month",
stepmode="backward"),
dict(count=6,
label="6m",
step="month",
stepmode="backward"),
dict(count=1,
label="YTD",
step="year",
stepmode="todate"),
dict(count=1,
label="1y",
step="year",
stepmode="backward"),
dict(count=2,
label="2y",
step="year",
stepmode="backward"),
dict(step="all")
])
),
rangeslider=dict(
autorange=True,
),
type="date"
),
yaxis=dict(
anchor="x",
autorange=True,
domain=[0, 0.33],
mirror=True,
showline=True,
side="left",
tickfont={"size":10},
tickmode="auto",
ticks="",
title='WI',
titlefont={"size":20},
type="linear",
zeroline=False
),
yaxis2=dict(
anchor="x",
autorange=True,
domain=[0.33, 0.66],
mirror=True,
showline=True,
side="left",
tickfont={"size":10},
tickmode="auto",
ticks="",
title = 'TX',
titlefont={"size":20},
type="linear",
zeroline=False
),
yaxis3=dict(
anchor="x",
autorange=True,
domain=[0.66, 1],
mirror=True,
showline=True,
side="left",
tickfont={"size":10},
tickmode="auto",
ticks='',
title="CA",
titlefont={"size":20},
type="linear",
zeroline=False
)
)
fig.update_layout(template='seaborn', title=title)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/productos_vendidos_tienda_por_fecha.html', auto_open=False)
fig.show()
En esta sección se analizará las ventas y las ganancias de cada una de las tiendas. Se realizarán tres gráficos por tienda, los detalles de cada uno de los gráficos realizados se encuentran a continuación:
Análisis de las ventas diarias de cada tienda.
Análisis de las ganancias diarias de cada tienda.
Mapa de calor con las ventas diarias.
Primero definimos la variable ingresos
df['revenue'] = df['sold']*df['sell_price']
cal_data = group.copy()
cal_data = cal_data[cal_data.date <= '22-05-2016']
cal_data['week'] = cal_data.date.dt.weekofyear
cal_data['day_name'] = cal_data.date.dt.day_name()
cal_data['date'] = pd.to_datetime( cal_data['date'],format = '%Y-%b%d')
fig = plot_metric(df,'CA','CA_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_1.html', auto_open=False)
fig.show()
Análisis del ingreso
fig = plot_metric(df,'CA','CA_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_1.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_1.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_2.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_3.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_3.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_3.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_4','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_CA_4.html', auto_open=False)
fig.show()
fig = plot_metric(df,'CA','CA_4','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_CA_4.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'CA', 'CA_4', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_CA_4.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_1.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_1.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_1.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_2.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_TX_3.html', auto_open=False)
fig.show()
fig = plot_metric(df,'TX','TX_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_TX_3.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'TX', 'TX_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_TX_3.html', auto_open=False)
fig.show()
#@title
fig = plot_metric(df,'WI','WI_1','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_1.html', auto_open=False)
fig.show()
#@title
fig = plot_metric(df,'WI','WI_1','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_1.html', auto_open=False)
fig.show()
#@title
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_1', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_1.html', auto_open=False)
fig.show()
fig = plot_metric(df,'WI','WI_2','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'WI','WI_2','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_2.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_2', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_2.html', auto_open=False)
fig.show()
fig = plot_metric(df,'WI','WI_3','sold', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/total_productos_WI_3.html', auto_open=False)
fig.show()
fig = plot_metric(df,'WI','WI_3','revenue', color_map=False)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/ingresos_productos_WI_3.html', auto_open=False)
fig.show()
#Z: es la cantidad de productos vendidos
#X: es la semana correspondiente del año
fig = calmap(cal_data, 'WI', 'WI_3', 'magma', color_map=True)
plotly.offline.plot(fig, filename = ruta+'/EDA_HTMLS/color_map_WI_3.html', auto_open=False)
fig.show()